{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "from sklearn.metrics import r2_score\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
    "train = pd.read_csv(\"Ames_House_train.csv\", skipinitialspace=True)\n",
    "train.head()\n",
    "\n",
    "train = train.drop('Id', axis = 1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>MSZoning</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>Street</th>\n",
       "      <th>Alley</th>\n",
       "      <th>LotShape</th>\n",
       "      <th>LandContour</th>\n",
       "      <th>Utilities</th>\n",
       "      <th>LotConfig</th>\n",
       "      <th>...</th>\n",
       "      <th>PoolArea</th>\n",
       "      <th>PoolQC</th>\n",
       "      <th>Fence</th>\n",
       "      <th>MiscFeature</th>\n",
       "      <th>MiscVal</th>\n",
       "      <th>MoSold</th>\n",
       "      <th>YrSold</th>\n",
       "      <th>SaleType</th>\n",
       "      <th>SaleCondition</th>\n",
       "      <th>SalePrice</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1450</th>\n",
       "      <td>90</td>\n",
       "      <td>RL</td>\n",
       "      <td>60.0</td>\n",
       "      <td>9000</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>FR2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>2009</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>136000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1451</th>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>78.0</td>\n",
       "      <td>9262</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>Inside</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>2009</td>\n",
       "      <td>New</td>\n",
       "      <td>Partial</td>\n",
       "      <td>287090</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1452</th>\n",
       "      <td>180</td>\n",
       "      <td>RM</td>\n",
       "      <td>35.0</td>\n",
       "      <td>3675</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>Inside</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>2006</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>145000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1453</th>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>90.0</td>\n",
       "      <td>17217</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>Inside</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>2006</td>\n",
       "      <td>WD</td>\n",
       "      <td>Abnorml</td>\n",
       "      <td>84500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1454</th>\n",
       "      <td>20</td>\n",
       "      <td>FV</td>\n",
       "      <td>62.0</td>\n",
       "      <td>7500</td>\n",
       "      <td>Pave</td>\n",
       "      <td>Pave</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>Inside</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>2009</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>185000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1455</th>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>62.0</td>\n",
       "      <td>7917</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>Inside</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>2007</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>175000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1456</th>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>85.0</td>\n",
       "      <td>13175</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>Inside</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>MnPrv</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>210000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1457</th>\n",
       "      <td>70</td>\n",
       "      <td>RL</td>\n",
       "      <td>66.0</td>\n",
       "      <td>9042</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>Inside</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>GdPrv</td>\n",
       "      <td>Shed</td>\n",
       "      <td>2500</td>\n",
       "      <td>5</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>266500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1458</th>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>68.0</td>\n",
       "      <td>9717</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>Inside</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>142125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1459</th>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>75.0</td>\n",
       "      <td>9937</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>Inside</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>147500</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 80 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \\\n",
       "1450          90       RL         60.0     9000   Pave   NaN      Reg   \n",
       "1451          20       RL         78.0     9262   Pave   NaN      Reg   \n",
       "1452         180       RM         35.0     3675   Pave   NaN      Reg   \n",
       "1453          20       RL         90.0    17217   Pave   NaN      Reg   \n",
       "1454          20       FV         62.0     7500   Pave  Pave      Reg   \n",
       "1455          60       RL         62.0     7917   Pave   NaN      Reg   \n",
       "1456          20       RL         85.0    13175   Pave   NaN      Reg   \n",
       "1457          70       RL         66.0     9042   Pave   NaN      Reg   \n",
       "1458          20       RL         68.0     9717   Pave   NaN      Reg   \n",
       "1459          20       RL         75.0     9937   Pave   NaN      Reg   \n",
       "\n",
       "     LandContour Utilities LotConfig    ...     PoolArea PoolQC  Fence  \\\n",
       "1450         Lvl    AllPub       FR2    ...            0    NaN    NaN   \n",
       "1451         Lvl    AllPub    Inside    ...            0    NaN    NaN   \n",
       "1452         Lvl    AllPub    Inside    ...            0    NaN    NaN   \n",
       "1453         Lvl    AllPub    Inside    ...            0    NaN    NaN   \n",
       "1454         Lvl    AllPub    Inside    ...            0    NaN    NaN   \n",
       "1455         Lvl    AllPub    Inside    ...            0    NaN    NaN   \n",
       "1456         Lvl    AllPub    Inside    ...            0    NaN  MnPrv   \n",
       "1457         Lvl    AllPub    Inside    ...            0    NaN  GdPrv   \n",
       "1458         Lvl    AllPub    Inside    ...            0    NaN    NaN   \n",
       "1459         Lvl    AllPub    Inside    ...            0    NaN    NaN   \n",
       "\n",
       "     MiscFeature MiscVal MoSold  YrSold  SaleType  SaleCondition  SalePrice  \n",
       "1450         NaN       0      9    2009        WD         Normal     136000  \n",
       "1451         NaN       0      5    2009       New        Partial     287090  \n",
       "1452         NaN       0      5    2006        WD         Normal     145000  \n",
       "1453         NaN       0      7    2006        WD        Abnorml      84500  \n",
       "1454         NaN       0     10    2009        WD         Normal     185000  \n",
       "1455         NaN       0      8    2007        WD         Normal     175000  \n",
       "1456         NaN       0      2    2010        WD         Normal     210000  \n",
       "1457        Shed    2500      5    2010        WD         Normal     266500  \n",
       "1458         NaN       0      4    2010        WD         Normal     142125  \n",
       "1459         NaN       0      6    2008        WD         Normal     147500  \n",
       "\n",
       "[10 rows x 80 columns]"
      ]
     },
     "execution_count": 134,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.shape\n",
    "train.tail(10)\n",
    "#"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "float"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(train.iat[1450, 5])\n",
    "#type(train.iat[1454, 5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [],
   "source": [
    "#接下来开始数据清洗,对nan 清洗，对于nan 超过1/3 的直接去掉, 最后lasso的 结果是 0.871 和 0.70\n",
    "#对于 object 类型，先简化处理 直接去掉\n",
    "\n",
    "leng = len(train)\n",
    "leng = leng / 3\n",
    "\n",
    "\n",
    "for col in cols:\n",
    "    if train[col].isnull().sum() > leng:\n",
    "        train = train.drop(col, axis = 1)\n",
    "        #test = test.drop(col, axis = 1)\n",
    "\n",
    "train.shape\n",
    "\n",
    "cols = train.columns\n",
    "#print(cols)\n",
    "\n",
    "#接下来 尝试 使用 众数 填补 丢失的数据,,,尝试一直不成功。。。\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/lyp/.local/lib/python3.6/site-packages/ipykernel_launcher.py:16: RuntimeWarning: invalid value encountered in long_scalars\n",
      "  app.launch_new_instance()\n"
     ]
    }
   ],
   "source": [
    "'''\n",
    "经过直接去掉object类型的数据，发现 对 最终的结果影响非常大，证明  在object中 有 特征 对SalePrice的贡献很大\n",
    "接下来 思路是  根据 SalePrice的均值 对 每个特征进行 排序， 用最后的排序结果的下标 代替 object类型\n",
    "'''\n",
    "train2 = train.copy()\n",
    "cols = train.columns\n",
    "colindex = 0\n",
    "for col in cols:\n",
    "    train2 = train.copy()\n",
    "    if type(train2.loc[0,col]) == str: #train[col].dtype == object\n",
    "        ex_list = set(train2[col])\n",
    "        dic = {}\n",
    "        #下面这段代码 是 给 每个特征 下的 每个值 计算均价\n",
    "        for value in ex_list:               \n",
    "            arr = train2[train2[col] == value]['SalePrice']\n",
    "            dic[value] = arr.sum() / len(arr)\n",
    "            train2 = train2[train2[col] != value]\n",
    "        #接下来排序\n",
    "        rank = {}\n",
    "        for a in dic:\n",
    "            cnt = 1\n",
    "            for b in dic:\n",
    "                if dic[b] < dic[a]:\n",
    "                    cnt += 1\n",
    "            rank[a] = cnt\n",
    "        #print(rank)\n",
    "        #接下来做替换\n",
    "        for i in range(0, len(train)):\n",
    "            value = train.iat[i, colindex]\n",
    "            train.iat[i, colindex] = rank[value]\n",
    "            \n",
    "    colindex += 1 \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>MSZoning</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>Street</th>\n",
       "      <th>LotShape</th>\n",
       "      <th>LandContour</th>\n",
       "      <th>Utilities</th>\n",
       "      <th>LotConfig</th>\n",
       "      <th>LandSlope</th>\n",
       "      <th>...</th>\n",
       "      <th>EnclosedPorch</th>\n",
       "      <th>3SsnPorch</th>\n",
       "      <th>ScreenPorch</th>\n",
       "      <th>PoolArea</th>\n",
       "      <th>MiscVal</th>\n",
       "      <th>MoSold</th>\n",
       "      <th>YrSold</th>\n",
       "      <th>SaleType</th>\n",
       "      <th>SaleCondition</th>\n",
       "      <th>SalePrice</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1455</th>\n",
       "      <td>60</td>\n",
       "      <td>4</td>\n",
       "      <td>62.0</td>\n",
       "      <td>7917</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>2007</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>175000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1456</th>\n",
       "      <td>20</td>\n",
       "      <td>4</td>\n",
       "      <td>85.0</td>\n",
       "      <td>13175</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2010</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>210000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1457</th>\n",
       "      <td>70</td>\n",
       "      <td>4</td>\n",
       "      <td>66.0</td>\n",
       "      <td>9042</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2500</td>\n",
       "      <td>5</td>\n",
       "      <td>2010</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>266500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1458</th>\n",
       "      <td>20</td>\n",
       "      <td>4</td>\n",
       "      <td>68.0</td>\n",
       "      <td>9717</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>112</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>2010</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>142125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1459</th>\n",
       "      <td>20</td>\n",
       "      <td>4</td>\n",
       "      <td>75.0</td>\n",
       "      <td>9937</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>2008</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>147500</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 75 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      MSSubClass MSZoning  LotFrontage  LotArea Street LotShape LandContour  \\\n",
       "1455          60        4         62.0     7917      2        1           2   \n",
       "1456          20        4         85.0    13175      2        1           2   \n",
       "1457          70        4         66.0     9042      2        1           2   \n",
       "1458          20        4         68.0     9717      2        1           2   \n",
       "1459          20        4         75.0     9937      2        1           2   \n",
       "\n",
       "     Utilities LotConfig LandSlope    ...    EnclosedPorch 3SsnPorch  \\\n",
       "1455         2         1         1    ...                0         0   \n",
       "1456         2         1         1    ...                0         0   \n",
       "1457         2         1         1    ...                0         0   \n",
       "1458         2         1         1    ...              112         0   \n",
       "1459         2         1         1    ...                0         0   \n",
       "\n",
       "     ScreenPorch PoolArea MiscVal  MoSold  YrSold  SaleType  SaleCondition  \\\n",
       "1455           0        0       0       8    2007         5              5   \n",
       "1456           0        0       0       2    2010         5              5   \n",
       "1457           0        0    2500       5    2010         5              5   \n",
       "1458           0        0       0       4    2010         5              5   \n",
       "1459           0        0       0       6    2008         5              5   \n",
       "\n",
       "     SalePrice  \n",
       "1455    175000  \n",
       "1456    210000  \n",
       "1457    266500  \n",
       "1458    142125  \n",
       "1459    147500  \n",
       "\n",
       "[5 rows x 75 columns]"
      ]
     },
     "execution_count": 137,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>MSZoning</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>Street</th>\n",
       "      <th>LotShape</th>\n",
       "      <th>LandContour</th>\n",
       "      <th>Utilities</th>\n",
       "      <th>LotConfig</th>\n",
       "      <th>LandSlope</th>\n",
       "      <th>...</th>\n",
       "      <th>EnclosedPorch</th>\n",
       "      <th>3SsnPorch</th>\n",
       "      <th>ScreenPorch</th>\n",
       "      <th>PoolArea</th>\n",
       "      <th>MiscVal</th>\n",
       "      <th>MoSold</th>\n",
       "      <th>YrSold</th>\n",
       "      <th>SaleType</th>\n",
       "      <th>SaleCondition</th>\n",
       "      <th>SalePrice</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1455</th>\n",
       "      <td>60</td>\n",
       "      <td>4</td>\n",
       "      <td>62.0</td>\n",
       "      <td>7917</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>2007</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>175000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1456</th>\n",
       "      <td>20</td>\n",
       "      <td>4</td>\n",
       "      <td>85.0</td>\n",
       "      <td>13175</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2010</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>210000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1457</th>\n",
       "      <td>70</td>\n",
       "      <td>4</td>\n",
       "      <td>66.0</td>\n",
       "      <td>9042</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2500</td>\n",
       "      <td>5</td>\n",
       "      <td>2010</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>266500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1458</th>\n",
       "      <td>20</td>\n",
       "      <td>4</td>\n",
       "      <td>68.0</td>\n",
       "      <td>9717</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>112</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>2010</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>142125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1459</th>\n",
       "      <td>20</td>\n",
       "      <td>4</td>\n",
       "      <td>75.0</td>\n",
       "      <td>9937</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>2008</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>147500</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 75 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      MSSubClass MSZoning  LotFrontage  LotArea Street LotShape LandContour  \\\n",
       "1455          60        4         62.0     7917      2        1           2   \n",
       "1456          20        4         85.0    13175      2        1           2   \n",
       "1457          70        4         66.0     9042      2        1           2   \n",
       "1458          20        4         68.0     9717      2        1           2   \n",
       "1459          20        4         75.0     9937      2        1           2   \n",
       "\n",
       "     Utilities LotConfig LandSlope    ...    EnclosedPorch 3SsnPorch  \\\n",
       "1455         2         1         1    ...                0         0   \n",
       "1456         2         1         1    ...                0         0   \n",
       "1457         2         1         1    ...                0         0   \n",
       "1458         2         1         1    ...              112         0   \n",
       "1459         2         1         1    ...                0         0   \n",
       "\n",
       "     ScreenPorch PoolArea MiscVal  MoSold  YrSold  SaleType  SaleCondition  \\\n",
       "1455           0        0       0       8    2007         5              5   \n",
       "1456           0        0       0       2    2010         5              5   \n",
       "1457           0        0    2500       5    2010         5              5   \n",
       "1458           0        0       0       4    2010         5              5   \n",
       "1459           0        0       0       6    2008         5              5   \n",
       "\n",
       "     SalePrice  \n",
       "1455    175000  \n",
       "1456    210000  \n",
       "1457    266500  \n",
       "1458    142125  \n",
       "1459    147500  \n",
       "\n",
       "[5 rows x 75 columns]"
      ]
     },
     "execution_count": 138,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.preprocessing import Imputer\n",
    "imp = Imputer(missing_values=np.nan, strategy='most_frequent', axis=0)\n",
    "imp.fit(train)\n",
    "train.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZYAAAENCAYAAAAsWUMWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3XmcXGWZ6PHfU1W97+nuJJ2FdEI6hERZW4KIoKAS3IKjXIOOA4qXcYQ74zYXmFFnZGRG5s41M3MHF0ZQRCEgLgRF4wIoICYESALZSCfpJJ2tu5Pe0ntXPfeP83ZSVGpLp9K1Pd/Ppz+pOuc97/vUqUo9dc55z/uKqmKMMcakii/dARhjjMktlliMMcaklCUWY4wxKWWJxRhjTEpZYjHGGJNSlliMMcaklCUWY4wxKWWJxRhjTEpZYjHGGJNSgXQHkA51dXXa2NiY7jCMMSarvPjii52qWp+oXFKJRUSWAv8B+IHvqOrXItYXAd8HLgQOAx9W1Va37nbgRiAI/LWqro5Xp4jMBVYCU4CXgI+p6kisNkSkAPgOcIF7Pd9X1X+J93oaGxtZt25dMi/dGGOMIyK7kymX8FSYiPiBu4GrgUXAdSKyKKLYjUCXqs4HVgB3uW0XAcuBxcBS4Bsi4k9Q513AClVtArpc3THbAK4FilT1jXhJ5y9FpDGZF2+MMSb1krnGchHQoqo7VXUE72hiWUSZZcD97vGjwJUiIm75SlUdVtVdQIurL2qdbpsrXB24Oq9J0IYCZSISAEqAEaA36T1gjDEmpZJJLDOBvWHP29yyqGVUdQzoAWrjbBtreS3Q7eqIbCtWG48C/cABYA/wb6p6JInXZYwx5jRIJrFIlGWRY+3HKpOq5fHauAjv+s0MYC7weRGZF1lQRG4SkXUisq6joyNKVcYYY1IhmcTSBswOez4L2B+rjDslVQUcibNtrOWdQLWrI7KtWG18BPiVqo6qajvwHNAc+SJU9R5VbVbV5vr6hJ0ajDHGTFAyieUFoElE5opIId7F+FURZVYB17vHHwKeVG8GsVXAchEpcr29moC1sep02zzl6sDV+ViCNvYAV4inDLgY2Jr8LjDGGJNKCbsbq+qYiNwCrMbrGnyfqm4SkTuAdaq6CrgXeEBEWvCOIpa7bTeJyCPAZmAMuFlVgwDR6nRN3gqsFJGvAi+7uonVBl7vsu8Cr+KdLvuuqm6c8B4xxhhzSiQfpyZubm5Wu4/FGGNOjoi8qKonXGqIZEO6GGOMSam8HNLFnOjBNXsSlvnIkjMmIRJjTLazIxZjjDEpZYnFGGNMSlliMcYYk1KWWIwxxqSUJRZjjDEpZYnFGGNMSlliMcYYk1KWWIwxxqSU3SBpjnllXw8DI2MEfMKsmlKmVRanOyRjTBayxGIAONAzyENrj999X1kc4G+vWojfF20aHGOMic1OhRkAXt3XiwB/c2UT1144i96hMbYetBmejTEnzxKLAWDT/h4a68qYVlnMubOrqSopYO0um+HZGHPyLLEYWtqP0t43zOIZlQD4RHhTYw3b249y+OhwmqMzxmQbSyyG1ZsOArB4RtWxZc1zpuATeKHVjlqMMSfHEovhl68eYHZNCVUlBceWVZYUcHZDJet2dzEWDKUxOmNMtrHEkuf2Hhng1X29vGFm1QnrmudMYWAkyI6O/jREZozJVkklFhFZKiLbRKRFRG6Lsr5IRB5269eISGPYutvd8m0iclWiOkVkrqtju6uzMF4bIvJREVkf9hcSkfMmukPyzW82HwJgUUPlCevm1ZcR8Akt7X2THZYxJoslTCwi4gfuBq4GFgHXiciiiGI3Al2qOh9YAdzltl0ELAcWA0uBb4iIP0GddwErVLUJ6HJ1x2xDVX+oquep6nnAx4BWVV1/8rsiP20+0Et9RRG15UUnrCvw+2isK6Ol42gaIjPGZKtkjlguAlpUdaeqjgArgWURZZYB97vHjwJXioi45StVdVhVdwEtrr6odbptrnB14Oq8JkEb4a4DHkriNRln+6E+Fkwrj7l+fn05h3qH6R0cncSojDHZLJnEMhPYG/a8zS2LWkZVx4AeoDbOtrGW1wLdro7ItmK1Ee7DWGJJmqqyvf0oTVMrYpaZP9VLOnbUYoxJVjKJJdqYHppkmVQtTxiHiCwBBlT11SjlEJGbRGSdiKzr6OiIViTv7OseZGAkSFOcI5bpVcWUFfppabfEYoxJTjKJpQ2YHfZ8FrA/VhkRCQBVwJE428Za3glUuzoi24rVxrjlxDlaUdV7VLVZVZvr6+vjvNz8sd0li3hHLD4Rzpxazo72o6hG/p4wxpgTJZNYXgCaXG+tQrwv8FURZVYB17vHHwKeVO9baBWw3PXomgs0AWtj1em2ecrVgavzsQRtICI+4Fq8azUmSdsPeb29mqbGPmIZX983PMa2Q9Y7zBiTWMLRjVV1TERuAVYDfuA+Vd0kIncA61R1FXAv8ICItOAdRSx3224SkUeAzcAYcLOqBgGi1emavBVYKSJfBV52dROrDecyoE1Vd050R+Sj7YeOUldeRE1ZYdxy890RzbPbO1k4/cRuycYYEy6pYfNV9QngiYhlXw57PIR3xBBt2zuBO5Op0y3fiddrLHJ5vDaeBi6O9xrMiV5rPxq3R9i4qpIC6suLeGZ7J59867xJiMwYk83szvs8paq0HOpjwbTY11fCzZ9azppdhxkaDZ7myIwx2c4SS57a3zNE/0jwWHfiROZPLWdoNMRLu7tOc2TGmGxniSVPjV+4T/aIZV6dN7zLMy2dpzMsY0wOsMSSp7YfGu9qnNwRS1GBn/PPqObZ7ZZYjDHxWWLJU9vb+5LqERbu0vn1vLq/h67+kdMYmTEm2yXVK8xktwfX7Dlh2fM7DlNZHIi6Lpa3LqhjxW9f47kdnbz3nBmpDNEYk0PsiCVPdR4doa7ixBGN4zlnZhUVxQE7HWaMicsSSx4aGg0yOBpkSmnyp8EAAn4fl5xZyzPbO214F2NMTJZY8lD3gDcEfnVpQYKSJ7psQT37ugdtUEpjTEyWWPJQ14B38b3mJI9YAK5YOBWA321tT2lMxpjcYYklDx1LLCfRI2xcQ1UJixoq+d2WQ6kOyxiTIyyx5KHugVEK/EJZoX9C27/j7Km8uLvLuh0bY6KyxJKHugZGqC4p5MSZnZNzxdnTCCn8/jWbMM0YcyJLLHmoe2CUmrKTv3A/7pyZVdSVF9l1FmNMVJZY8lDXwAjVE7hwP87nE65YWM/T29oZDYZSGJkxJhfYnfd5ZngsyMBIkJqSkz9iCb9Lv8Dvo29ojLt+uZV59cfHG/vIkjNSEqcxJnvZEUueOXYPywR6hIWbX1+O3ydsPWjTFRtjXs8SS545lXtYwhUV+JlXV8bWg72pCMsYk0OSSiwislREtolIi4jcFmV9kYg87NavEZHGsHW3u+XbROSqRHWKyFxXx3ZXZ2ESbZwjIs+LyCYReUVEiieyM/JBlztiqZnAXfeRFk6voPPoCJ19w6dclzEmdyRMLCLiB+4GrgYWAdeJyKKIYjcCXao6H1gB3OW2XQQsBxYDS4FviIg/QZ13AStUtQnocnXHayMA/AD4lKouBt4GjJ7kfsgb3QMjBHxCedGpX147a3olAFsP2ekwY8xxyRyxXAS0qOpOVR0BVgLLIsosA+53jx8FrhTvJollwEpVHVbVXUCLqy9qnW6bK1wduDqvSdDGu4CNqroBQFUPq6pNzB5D18Ao1aUFE76HJdyUskKmVhTZ6TBjzOskk1hmAnvDnre5ZVHLqOoY0APUxtk21vJaoNvVEdlWrDYWACoiq0XkJRH539FehIjcJCLrRGRdR0f+3tjXPTByytdXwp3dUElrZz9Do5bLjTGeZBJLtJ+2kWOmxyqTquXx2ggAlwIfdf9+QESuPKGg6j2q2qyqzfX19VGqyg9d/ad2D0ukhdMrCCm8ZqfDjDFOMomlDZgd9nwWsD9WGXfNowo4EmfbWMs7gWpXR2Rb8dr4vap2quoA8ARwQRKvK++MjIXoHwmm5ML9uNlTSikt9Fu3Y2PMMckklheAJtdbqxDvYvyqiDKrgOvd4w8BT6o3E9QqYLnr0TUXaALWxqrTbfOUqwNX52MJ2lgNnCMipS7hXA5sTn4X5I9u19U4lUcsPhGappbT0n7UJv8yxgBJ3HmvqmMicgveF7gfuE9VN4nIHcA6VV0F3As8ICIteEcRy922m0TkEbwv+jHg5vEL69HqdE3eCqwUka8CL7u6idNGl4h8HS9ZKfCEqv7ilPZKjkplV+NwZ9aXs6Gth3brdmyMIckhXVT1CbxTTOHLvhz2eAi4Nsa2dwJ3JlOnW74Tr9dY5PJ4bfwAr8uxiaNn0EssVRMYziWeM92QLjs7bFZJY4zdeZ9XegZHEKCiOLWJpaaskJrSAnZ09Ke0XmNMdrLEkkd6BkepLCnA7zv1e1gizasvZ1dnP8GQXWcxJt9ZYskj3YOjKT8NNu7M+jIGR4NsOWA3SxqT7yyx5JGegdOXWObVeddZnt9x+LTUb4zJHpZY8oSq0nMaj1gqSwqoLy/ijzs6T0v9xpjsYYklTwyMBBkL6WlLLADz6stYu+uIzSppTJ6zxJInTldX43Dz6svpHwmyab9dZzEmn1liyROTkVhm15QAsGFv92lrwxiT+Syx5IljiSXFd92HqyopoL6iiA1tlliMyWeWWPJEz+AofknNBF+xiAjnzqqyIxZj8pwlljzh3RwZwJeCCb7iOXdWNTs7++kdskk8jclXlljyRPdpvIcl3Dmzq1GFV9t6TntbxpjMZIklT/QMjkxOYplZBcAGSyzG5C1LLHkgpErv4BhVJambhyWWmrJC5tSW2nUWY/KYJZY80D88RlD1tPYIC3fOrGo2Ws8wY/KWJZY8MN7VuHoSToUBnDuriv09Q7T3DU1Ke8aYzGKJJQ90D5z+myPDnTu7GoCNe+06izH5KKnEIiJLRWSbiLSIyG1R1heJyMNu/RoRaQxbd7tbvk1ErkpUp4jMdXVsd3UWxmtDRBpFZFBE1ru/b010Z+SqybjrPtziGZX4fWI3ShqTpxImFhHxA3cDVwOLgOtEZFFEsRuBLlWdD6wA7nLbLsKbm34xsBT4hoj4E9R5F7BCVZuALld3zDacHap6nvv71EntgTzQMzhKwCeUFvonpb3SwgBn1pfZmGHG5KlkjlguAlpUdaeqjgArgWURZZYB97vHjwJXioi45StVdVhVdwEtrr6odbptrnB14Oq8JkEbJoHx4fInc3ctaqi0Sb+MyVPJjO8xE9gb9rwNWBKrjKqOiUgPUOuW/yli25nucbQ6a4FuVR2LUj5WGwBzReRloBf4oqo+k8Tryhs9g6OT1iPswTV7ABgaDXGgZ4jv/GEnpRHDyHxkyRmTEosxJj2SOWKJ9jM3cmLzWGVStTxeGweAM1T1fOBzwIMiUhlZUERuEpF1IrKuo6MjSlW5q2dwlKriyUks4xqqigE40Gs9w4zJN8kkljZgdtjzWcD+WGVEJABUAUfibBtreSdQ7eqIbCtqG+4022EAVX0R2AEsiHwRqnqPqjaranN9fX0SLzs3jAVD9E7iEcu46eOJpccSizH5JpnE8gLQ5HprFeJdjF8VUWYVcL17/CHgSVVVt3y569E1F2gC1saq023zlKsDV+dj8doQkXrXGQARmefa2Jn8LshtHUeHUSavR9i4iuICKooCHOwZnNR2jTHpl/Aai7uecQuwGvAD96nqJhG5A1inqquAe4EHRKQF70hludt2k4g8AmwGxoCbVTUIEK1O1+StwEoR+SrwsqubWG0AlwF3iMgYEAQ+papHJr5Lcsv+bu+IYbJujgw3varYjliMyUNJTc6hqk8AT0Qs+3LY4yHg2hjb3gncmUydbvlOvF5jkcujtqGqPwZ+nPBF5KkD7ohhMsYJi9RQVcxzOw4zFgoR8Nm9uMbkC/vfnuMOuCOWyT4VBjC9qoRgSOnoG570to0x6WOJJcft7xmkMOCjuGDy3+rxnmEH7XSYMXnFEkuOO9A9NOk3R46rKy8i4BO7zmJMnrHEkuMO9Aym5cI9gN8nTKsstiMWY/KMJZYct79nKC3XV8ZNrypmf88gXk9yY0w+sMSSw0bGQnQeHU5rYmmoKmZgJEjf0FjiwsaYnGCJJYcd6h1CNT09wsY1VJUAdge+MfnEEksO29/t7mGZ5OFcwk2vHO8ZZnfgG5MvLLHksPGjhHQesZQU+qkuLbDBKI3JI5ZYcth+d5RQnYa77sM1VBYfu1HTGJP7LLHksPF7WAoD6X2bp1eV0Hl0mNFgKK1xGGMmhyWWHHagZ/DY3e/p1FBVjOJ1JjDG5D5LLDlsf/dQxiQWsJ5hxuQLSyw57GDvEA3VJekOg5qyQgoDvmMjLRtjcpsllhw1NBrkSP8IMzLgiMUnwvRKm5vFmHxhiSVH7XP3sMysSf8RC3inww72DNnQLsbkAUssOaqty0sss2pK0xyJp6GqhOGxEF0Do+kOxRhzmlliyVFtXQMAzMqgIxbArrMYkweSSiwislREtolIi4jcFmV9kYg87NavEZHGsHW3u+XbROSqRHWKyFxXx3ZXZ2GiNtz6M0TkqIh84WR3Qi5q6xqkwC9MrUj/NRbwRjn2yfFTdMaY3JUwsYiIH7gbuBpYBFwnIosiit0IdKnqfGAFcJfbdhGwHFgMLAW+ISL+BHXeBaxQ1Sagy9Uds40wK4BfJvvCc11b1yAzq0vw+yZ/gq9oCvw+plcWHztFZ4zJXckcsVwEtKjqTlUdAVYCyyLKLAPud48fBa4Ub8rCZcBKVR1W1V1Ai6svap1umytcHbg6r0nQBiJyDbAT2JT8S89tbV0DGXN9ZdysmlLaugYIhewCvjG5LJnEMhPYG/a8zS2LWkZVx4AeoDbOtrGW1wLdro7ItqK2ISJlwK3AV+K9CBG5SUTWici6jo6OBC85+7V1DWbM9ZVxs2pKGBoNsetwf7pDMcacRskklmjnUiJ/csYqk6rl8dr4Ct6ps6NR1h8vqHqPqjaranN9fX28ollvaDRIR99w5iWWKd4R1Ia93WmOxBhzOiWTWNqA2WHPZwH7Y5URkQBQBRyJs22s5Z1Atasjsq1YbSwB/lVEWoHPAH8nIrck8bpyVqZ1NR43taKIwoDPEosxOS6ZxPIC0OR6axXiXYxfFVFmFXC9e/wh4En17oRbBSx3PbrmAk3A2lh1um2ecnXg6nwsXhuq+lZVbVTVRuDfgX9W1f86iX2QczKtq/E4nwgzq0tY39aT7lCMMadRIFEBVR1zRwCrAT9wn6puEpE7gHWqugq4F3hARFrwjiKWu203icgjwGZgDLhZVYMA0ep0Td4KrBSRrwIvu7qJ1YY5UaYesQDMrinhTzuPMDwWpCjgT3c4xpjTIGFiAVDVJ4AnIpZ9OezxEHBtjG3vBO5Mpk63fCder7HI5THbCCvzj/HW54vj97AUpTuUE8yqKWUk2MnWA32cO7s63eEYY04Du/M+B7V1DTCzugRfhtzDEm789NyGNrvOYkyussSSg7yuxpl3GgygqqSA+ooi1tsFfGNyliWWHJSJ97CMExEuOKOaF1qPpDsUY8xpYoklxwyNBuk8mnn3sIR7y/w69h4ZZLfdKGlMTrLEkmPGe4TNnpKZp8IALp1fB8Az2zvTHIkx5nSwxJJjMvUelnBz68qYWV3Cs5ZYjMlJllhyTCbfwzJORLh0fh1/3NHJWDCU7nCMMSlmiSXH7DkyQFHAR3155t3DEu7Spjp6h8bYuM/uwjcm11hiyTGtnf3MqS3NyHtYwr1lfh0i2OkwY3KQJZYc03q4nzm1ZekOI6EpZYUsnlFpicWYHGSJJYeEQsruwwM01mbu9ZVwl86v56U9XRwdHktc2BiTNSyx5JBDfUMMj4VorMv8IxaAt59Vz1hI+d2WQ+kOxRiTQpZYcsiuTu+Gw8YsOBUG8KbGKTRUFfPY+sjpfYwx2cwSSw7Zfdi7hyVbjlh8PuH9583g9691cPjocLrDMcakiCWWHNLa2U9hwEdDZXG6Q0naB86fSTCk/OKVA+kOxRiTIpZYckjr4X7OmJL5XY3DLZxeycLpFfz05X3pDsUYkyKWWHKI1yMsO06Dhbvm/Jm8vKfbBqU0JkcklVhEZKmIbBORFhG5Lcr6IhF52K1fIyKNYetud8u3ichVieoUkbmuju2uzsJ4bYjIRSKy3v1tEJEPTHRnZLNQSGk93J81XY3Dvf/cGYjAT16yoxZjckHCxCIifuBu4GpgEXCdiCyKKHYj0KWq84EVwF1u20V4c9MvBpYC3xARf4I67wJWqGoT0OXqjtkG8CrQrKrnuTa+LSJJTbmcS9r7hhkazZ6uxuFmVJdwWVM9P1yzh+GxYLrDMcacomSOWC4CWlR1p6qOACuBZRFllgH3u8ePAleKiLjlK1V1WFV3AS2uvqh1um2ucHXg6rwmXhuqOqCq43fYFQOa7IvPJa2Hs6urcaRPvnUunUeHeXyDXcQ3Jtslk1hmAnvDnre5ZVHLuC/5HqA2zraxltcC3WGJIrytWG0gIktEZBPwCvCpsO3zRqu7h2VOFp4KA2+OlrOmVXDvs7tQzcvfBsbkjGQSS7QuRpH/82OVSdXyuHGo6hpVXQy8CbhdRE7obysiN4nIOhFZ19HREaWq7NZ6eIBCv48Z1Zk7D0s8IsInLm1ky4Fent9xON3hGGNOQTLXItqA2WHPZwGRt0qPl2lz1zeqgCMJto22vBOoFpGAO+oILx+rjWNUdYuI9ANvANZFrLsHuAegubk5534St3b2M3tKCf4s6Gr84Jo9UZePBkOUFfr5yuObWf3ZyyY5KmNMqiRzxPIC0OR6axXiXYxfFVFmFXC9e/wh4En1zmesApa7Hl1zgSZgbaw63TZPuTpwdT4Wrw1XRwBAROYAZwGtSe+BHOH1CMvO6yvjCvw+Lp5Xy7ZDfbS096U7HGPMBCVMLO7I4RZgNbAFeERVN4nIHSLyflfsXqBWRFqAzwG3uW03AY8Am4FfATerajBWna6uW4HPubpqXd0x2wAuBTaIyHrgp8CnVTWvxmIPhpRdnf3Mq8/uxAKwZF4tBX7hnj/sTHcoxpgJSqpbrqo+ATwRsezLYY+HgGtjbHsncGcydbrlO/F6jUUuj9qGqj4APJDwReSwfV2DDI+FmD+1PN2hnLLyogAXzqnhZy/v5/PvOotpWTQ8jTHGk3f3e+SaB9fsYevBXgB2dvTHvH6RTS6dX8/aXUf47nOt3Hb1wnSHY4w5STakSw7o6PNGBp5akRu/7qeUFXL1Gxv44Z920zc0mu5wjDEnyRJLDmjvG6a8KEBJoT/doaTMX142j77hMR5am/1HYMbkG0ssOaCjb5j6iqJ0h5FS58yq5pIza7nv2VZGxkLpDscYcxLsGkuWU1Xa+4Y4d1Z1ukNJqQfX7GHBtAr+uOMwf/eTV7hgTs0JZT6y5Iw0RGaMScSOWLLc0eExhkZDOXfEAtA0tZzplcX8YXsHIRvmxZisYYkly7Xn2IX7cCLCW5vqaO8b5rVDdsOkMdnCEkuWG+8RlotHLOBda6kqKeAPr+XVPa/GZDVLLFmuvW+YooCPyuLcvFzm9wlvObOW1sP97OsaTHc4xpgkWGLJcu19Q9RXFOFNZZObmhunUBTw8WxL7o1KbUwussSS5Tr6hpmao6fBxhUX+HlT4xRe2ddD98BIusMxxiRgiSWL9Q6N0jc0Rn0OXriP9OYzawF4fqfN1WJMprPEksVa2o8C5PwRC0BNaSGLZ1TxQusRhkeD6Q7HGBOHJZYs9tpBrwtuvowAfOn8OoZGQ6zb3ZXuUIwxcVhiyWJbD/ZRGPBRXVqQ7lAmxewppcyZUsofd3QSDNkNk8ZkKkssWWzrwV6mVRThy+EeYZEubaqja2CUzQd60x2KMSYGSyxZSlXZerCP6VUl6Q5lUp3dUMmUskKe3W5dj43JVJZYslR73zDdA6NMr8z9C/fhfOLdMLm3a5AX7VqLMRkpqcQiIktFZJuItIjIbVHWF4nIw279GhFpDFt3u1u+TUSuSlSniMx1dWx3dRbGa0NE3ikiL4rIK+7fKya6M7LJFncqKN+OWAAumFNDSYGfu59qSXcoxpgoEiYWEfEDdwNXA4uA60RkUUSxG4EuVZ0PrADuctsuApYDi4GlwDdExJ+gzruAFaraBHS5umO2AXQC71PVNwLXAw+c3C7ITttcj7DpedIjLFxRwM9bm+p4cms7L+2xoxZjMk0yRywXAS2qulNVR4CVwLKIMsuA+93jR4ErxRtjZBmwUlWHVXUX0OLqi1qn2+YKVweuzmvitaGqL6vqfrd8E1AsIjl/fmjrwT4aqopzatbIk/HmM2uZUlbIit+8lu5QjDERkkksM4G9Yc/b3LKoZVR1DOgBauNsG2t5LdDt6ohsK1Yb4T4IvKyqw0m8rqy25UAvZ02vSHcYaVMU8POpy+fxzPZO1tjd+MZklGQSS7S+rJE3EcQqk6rlCeMQkcV4p8f+Mko5ROQmEVknIus6OrK7R9FoMMSOjqMsnF6Z7lDS6mMXN1JfUcT//fVrqE0EZkzGSCaxtAGzw57PAvbHKiMiAaAKOBJn21jLO4FqV0dkW7HaQERmAT8F/kJVd0R7Eap6j6o2q2pzfX19Ei87c+3s6Gc0qCzM4yMWgJJCP399xXzWth5h9aZD6Q7HGOMkk1heAJpcb61CvIvxqyLKrMK7cA7wIeBJ9X5CrgKWux5dc4EmYG2sOt02T7k6cHU+Fq8NEakGfgHcrqrPncyLz1ZbD3o9whY25HdiAbjuojNYMK2cf35iC0M2hpgxGSFhYnHXM24BVgNbgEdUdZOI3CEi73fF7gVqRaQF+Bxwm9t2E/AIsBn4FXCzqgZj1enquhX4nKur1tUdsw1Xz3zgSyKy3v1NneD+yApbD/ZR4Bfm1ZWnO5S0C/h9fOm9i9hzZID7ntuV7nCMMYDk47np5uZmXbduXbrDmLDr71vLod4hfvWZy3hwzZ50h5M2H1lyxrHHn7x/Hc/v6OSpL7yNqXnYBduYySAiL6pqc6Jydud9llFVXt3XwxtnVqU7lIzy9+85m5FgiP+zelu6QzEm71liyTIHeoY43D8M7HhpAAAc0ElEQVTCG2dZYgk3t66Mj79lLj96sY2Nbd3pDseYvGaJJctsbOsBsCOWKG65Yj515YV85fHN1v3YmDSyxJJlXt3Xg98nnN2Q3/ewRFNZXMAX3nUWL+7uYtWGyB7xxpjJEkhcxGSSjft6WDCtguKC/BzKJVy0jgshVWZUFfPlxzbR1T/KDW9pnPzAjMlzdsSSRY5fuLejlVh8IrznnBn0DI7yB5uzxZi0sMSSRfZ1D3Kkf8SuryQwt66MN86s4pntHezrHkx3OMbkHUssWeTVfe7C/azqNEeS+Za+YTqq8LVfbk13KMbkHUssWeSVfT0EfJL3Y4Qlo6a0kLc21fP4hv280Hok3eEYk1cssWSRjW124f5kXL6gnumVxdzx+GZCIet+bMxkscSSJeyO+5NXGPBx29ULeWVfD4++1JbucIzJG5ZYssTuwwN0DYxyzmxLLCdj2XkzOP+Mav71V9voGxpNdzjG5AVLLFlirbtOcFHjlDRHkl1EhH9432I6jw5z91NRp+oxxqSYJZYssXbXEWpKC5g/1YbKP1nnza7mzy6YyX3P7mL34f50h2NMzrPEkiVeaD1Cc+MURKLN0GwSuXXpQgJ+4c5fbEl3KMbkPEssWaC9d4jdhwdYMtdOg03UtMpibn77fH69+RDPtXSmOxxjcpqNFZYFxq+vvMmur5y08PHEyosC1JQW8NmH13Pz2+dT4Pd+V4VPGGaMOXV2xJIF1u46Qmmhn8UzbIywU1Hg97HsvJm09w3zuI1+bMxpk1RiEZGlIrJNRFpE5LYo64tE5GG3fo2INIatu90t3yYiVyWqU0Tmujq2uzoL47UhIrUi8pSIHBWR/5rojshka3cd4YIzagj47XfAqVowrYLLF9SzbncXL+3pSnc4xuSkhKfCRMQP3A28E2gDXhCRVaq6OazYjUCXqs4XkeXAXcCHRWQRsBxYDMwAfisiC9w2seq8C1ihqitF5Fuu7m/GagMYAr4EvMH95YwH1+xhcCTItoN9zDy7JK/nt0+ld5w9jT1HBnhs/T5qSgvTHY4xOSeZn8AXAS2qulNVR4CVwLKIMsuA+93jR4Erxeu+tAxYqarDqroLaHH1Ra3TbXOFqwNX5zXx2lDVflV9Fi/B5Jzdh/tRYG5tWbpDyRl+n7D8TbOpLC7g3md38sDzrTbjpDEplMzF+5nA3rDnbcCSWGVUdUxEeoBat/xPEdvOdI+j1VkLdKvqWJTysdpIqouPiNwE3ARwxhnZc7F2e8dRAj5hVk1pukPJKRXFBXz6bfP50Yt7+dJjm/jBn/Zw8bxamqaV44vSpdsu8BuTvGQSS7QbJyJ/3sUqE2t5tCOleOWTjSMmVb0HuAegubk5K36eqipbDvQyf2o5hQG7vpJqJYV+/vziOTzzWgfPtnRy//N9VBYHOLuhkoXTK5k/tRy/z+4bMuZkJZNY2oDZYc9nAZFdasbLtIlIAKgCjiTYNtryTqBaRALuqCW8fKw2ctbB3iG6B0Z5+1lT0x1KzvKJcPlZU3lLUx1bDvSxfo93UX/NriNUlxbw9gVTOX+OzX9jzMlI5mfwC0CT661ViHcxflVEmVXA9e7xh4An1TtpvQpY7np0zQWagLWx6nTbPOXqwNX5WII2ctbmA70I2PwrkyDg8/HGmVV87M2NfPE9i/jzJWdQXhTgp+v38Z+/a6Gl/Wi6QzQmayRMLO7I4RZgNbAFeERVN4nIHSLyflfsXqBWRFqAzwG3uW03AY8Am4FfATerajBWna6uW4HPubpqXd0x2wAQkVbg68ANItLmeqNlvS0Hepk9pZSK4oJ0h5JXCvw+Fs2o4q8uP5O/ePMcBkeDfOAbz/H0tvZ0h2ZMVpAc/9EfVXNzs65bty7dYcS1v3uQS772JFctns7lC+rTHU5e6xoY4ecbD7DtYC9f/x/ncc35MxNvZEwOEpEXVbU5UTm7IpyhfrvlEABnN9hpsHSrKS3k0U+9mTc1TuHzP9rAk1sPpTskYzKaJZYM9etNh6grL2JqRXG6QzFAWVGA71zfzNkNFfzVD15i7a6c7jdizCmxxJKB9hwe4LkdnZwzy2aLzCQVxQV87+MXMbO6hBvvf4HN+3vTHZIxGclGN85A33++Fb+IzRaZQcKH0/nQhbP49h928j++/Tx/edk8asuLALuJ0phxdsSSYQZGxnhk3V6WvmE6lSXWGywTVZcW8vFLGgmpct9zu+gdHE13SMZkFEssGeanL++jd2iMGy5pTHcoJo6plcXccEkj/SNB7ntuFwMjY4k3MiZPWGLJIKrK/X9sZVFDJRfOqUl3OCaBWTWlfOziORzuH+H+P7bSP2zJxRiwxJJRntzazmuHjnLDJY02t32WOLO+nOVvmk1b1yAf/OYfaWnvS3dIxqSdXbzPEEOjQb7y+GbOrC+zG/CyzOIZVVx/SSOPb9jPe//fs9y6dCHXNs+mvOjE/17JzKljnQBMtrPEkiG+8fQO9hwZ4MH/ucRGMs5CC6ZV8Mu/eSuffWQ9X3l8M/9n9TaWLp7OmVPLqS0rRAT6hsb4447DDI0GGR4NUVzgo6qkgPqKYubVl1FgM4SaHGGJJQPs6uznW0/vYNl5M7jkzLp0h2MmaGplMT+4cQkv7enm0Rf38sQrB/nJy/tOKFcU8FEU8DE4GmQ06A2pVOAX5k+t4E2NNYRCis+G6zdZzBJLmg2NBvnsw+spDPj4+3efne5wzCkSES6cU8OFc2r4lz87h6HRIIf7RwCoKA6wav3+YxOJqSqDo0HaugbZerCXTft72XKgl2e3d/LxtzTywQtnUVpo/0VN9rFBKNPkwTV7CKny0No9bN7fy0eWnMHiGXanfTZL5tpIvGsswZDy6r4eth7sZUNbD5XFAa5bcgbXv7mRGdUlqQzVmAlJdhBK+zmUJqrK6lcPsml/L+9+w3RLKjkgmQvz8fh9wrmzq/naB9/IS3u6uPfZXfz3H3bynWd2cfUbpnNt82zePK/WrsGZjGeJJQ1GgyEeW7+fta1HWDJ3Cm+Zb9dVzHHe6bQpXDhnCm1dA3z/+d08tHYPP994gIriAJfOr+Pc2dWcM6uKN86ssvl6TMaxU2GTrGdglFseeolntndyWVM971o87dg5d2NiGQ2G2NF+lE37e9nZeZSugePDyNSVFzGjupjplcVMr/L+rSop4KMXz0ljxCYX2amwDPTLVw7w5VWb6Oof4c/On0mzDTJpklTg97GwoZKFDZUA9A+Psa97kLauQfZ1DbDnyAAb23qOlS8u8PGz9ftYOL2ShQ0VLJxewYJpFXZ0YyZFUolFRJYC/wH4ge+o6tci1hcB3wcuBA4DH1bVVrfuduBGIAj8taqujleniMwFVgJTgJeAj6nqyETayBQb9nbzH7/bzpNb21nUUMl3b3jT674EjDlZZUUBFkzzksW4odEgh3qHONAzxKHeIYIh5Wcv76PvT8eHmplVU8LC6ZXMrStlRnUJM6pLmOn+rSktsBEfTEokTCwi4gfuBt4JtAEviMgqVd0cVuxGoEtV54vIcuAu4MNu7vnlwGJgBvBbEVngtolV513AClVdKSLfcnV/82TbUNXgqeyYU9U7NMrvthzixy/u49mWTqpKCrjt6oXceOlcCvw+Sywm5YoL/MypLWNObdmxZapK9+Aoh3qGONjr/W1s6+bpbe2MhTRie9/xRFNV4hJPMTOrS2ioLqGhqpjiAv+E4xsaDdI9MErf0Ki7hydEwOejqMBHod9HUYGf4oCPsqIARQGfJbkslswRy0VAi6ruBBCRlcAyIDyxLAP+0T1+FPgv8T4Vy4CVqjoM7BKRFlcf0eoUkS3AFcBHXJn7Xb3fnEAbzye5D05aKKSMBEPeHdRjIY4Oj3Gox/uluOVALxv39bB+TzcjwRANVcXcfvVCPnrxnKhDfBhzOokINaWF1JQWHjuNBl7C6R8J0jMwSvfgCN0Do/QMjtI9OEprZz/r93bTN3TioJpTygqpLi2gprSQ6pICqksLKS/yE/D78AkMjAQZGAlydHiMgZEx+obGONI/Qlf/CP0jyf/WC/iEsqIA5UUByor8lBcFKC0M4PcJPgGfyLGbSEMhJahKMHT872DvEKoQUkUVAn6hpMBPSYGf4kLv37c21VFd6r2G6pICqkoKKC7wU1zgP3YTa+AURkNQVUIKY6EQoRBejEEv1qNDY/QMjtI75O33nsFRntraTv/wGAMjQfpHxhgcCTLmXk/AJxQGfMytK6O0KEB5YYCq0oIT3ouasgKqSwopLvDh9wkBn/dvgV8mNVEn8003E9gb9rwNWBKrjKqOiUgPUOuW/yli2/GBsKLVWQt0q+pYlPITaSOlNuzt5tpvPc9IMBSzTFHAx6IZlVx/yRyWvqGB82dX213UJuOICOXui3tmTfR7ZMaCIXqHxugeGKF7cJTuAe+LcHAkSPfACPu7BxkYCTIyFiKkSkj12JFHod9HYcBHcYGP+vIi5kwppcwlh+ICHwV+7wsvGFL35RliLKiMBkMMj4X9uR9vvYNjdPQNo3jJLRjyvrRV1X2BeonGL2H/+r0kJAgjwRA9g6Mc7B1icMSr88mt7UnsJxC3v+R1y9wKxtd7yxQlFHLJZAL9ogr9PkqL/JQVBigp9FPmk2P7aXgsxOH+EXYfGaB/eIzugVGGx2J/F0XyCQR8Pt57TgNf//B5Jx/cSUgmsUT7VozcZbHKxFoe7WdAvPITaeP1AYrcBNzknh4VkW1RtpuoOqBz/MlrwM+AL6awgQl4XUwZJBPjysSYIDPjysSYIDPjysSYWAF1K5ZPOK6kuhomk1jagNlhz2cB+2OUaRORAFAFHEmwbbTlnUC1iATcUUt4+Ym0cYyq3gPck8TrPWkisi6ZLniTKRNjgsyMKxNjgsyMKxNjgsyMKxNjgsmJK5kTiC8ATSIyV0QK8S6Ur4ooswq43j3+EPCkejfIrAKWi0iR6+3VBKyNVafb5ilXB67OxybYhjHGmDRIeMTirmfcAqzG6xp8n6puEpE7gHWqugq4F3jAXTg/gpcocOUewbvQPwbcPN5bK1qdrslbgZUi8lXgZVc3E2nDGGPM5MvLO+9TTURucqfaMkYmxgSZGVcmxgSZGVcmxgSZGVcmxgSTE5clFmOMMSllw6QaY4xJLVW1vwn+AUuBbUALcFsK670PaAdeDVs2BfgNsN39W+OWC/CfLoaNwAVh21zvym8Hrg9bfiHwitvmPzl+5Bq1DbduNl7Hii3AJuBv0h0XUIzXUWODi+krbvlcYI0r/zBQ6JYXuectbn1jWNu3u+XbgKsSvcex2ghb78e7RvjzDIqp1e3f9XjXR9P6/rl11Xg3PG/F+2y9OQNiOsvto/G/XuAzGRDXZ/E+568CD+F9/tP+uYr6HTYZX8C5+If3xbEDmAcU4n25LUpR3ZcBF/D6xPKv4282cBtwl3v8buCX7sN9MbAm7AO60/1b4x6P/0dYi/cfWNy2V8drwz1vGP8PA1Tg3a6zKJ1xuXLl7nGB+/BfDDwCLHfLvwX8lXv8aeBb7vFy4GH3eJF7/4rcf6Id7v2N+R7HaiNsf30OeJDjiSUTYmoF6iKWpftzdT/wSfe4EC/RpDWmKP/PD+Ldv5HOz/pMYBdQEvZe3xDrPWcSP1dR99tkfyHnyp/7UKwOe347cHsK62/k9YllG9DgHjcA29zjbwPXRZYDrgO+Hbb8225ZA7A1bPmxcrHaiBHfY3hjvWVEXEAp3qClS/DuhwpEvk94vRDf7B4HXDmJfO/Gy8V6j902Udtwz2cBv8Mbnujn8cpPVkxuWSsnJpa0vX9AJd6XpWRKTFE+V+8Cnkt3XBwfeWSK+5z8HLgq1nvOJH6uov3ZNZaJizbUzWkZSsaZpqoHANy/UxPEEW95W5Tl8dp4HRFpBM7HO0JIa1wi4heR9XinDn+D96srqWGBgPBhgU4m1nhDDwH8O/C/gfHxNpIequg0xgTeiBS/FpEX3UgUkN73bx7QAXxXRF4Wke+ISFmaY4q0HO+0U7xtTntcqroP+DdgD3AA73PyIpnxuTqBJZaJS2oomUlwskPdnFLcIlIO/Bj4jKr2pjsuVQ2q6nl4RwkXAWfHqSdVMcWMVUTeC7Sr6oth61I5VNGp7L+3qOoFwNXAzSJyWZRtxk3G+xfAO+X7TVU9H+jHO/2TzpiON+bdvP1+4EeJip7uuESkBm/A3bl4o7iX4b2PseqZzM/VCSyxTFxSQ8mk0CERaQBw/46PoBcrjnjLZ0VZHq8N3LICvKTyQ1X9SabEBaCq3cDTeOe4q92wP5H1HGs7yWGBYi0/NvRQlDbeArxfRFrx5hW6Au8IJp0xje+j/e7fduCneIk4ne9fG9Cmqmvc80fxEk1GfKbwvrhfUtVDSbyO0x3XO4BdqtqhqqPAT4BLyIDPVTSWWCYumaFuUil8SJvref1QN38hnouBHncIvRp4l4jUuF8778I7N3oA6BORi920A39B9GFzwtvAlb0X2KKqX8+EuESkXkSq3eMSvP98W0jdsEAnPfSQqt6uqrNUtdGVf1JVP5rOmNz+KRORivHHbr+/ms73T1UPAntF5Cy37kq8ETTS+lkPcx3HT4PF22Yy4toDXCwipW6b8X2V1s9VTIkuwthf3Avs78brHbUD+PsU1vsQ3nnUUbxfEjfinev8HV6Xv98BU1xZwZs0bQde98XmsHo+gdd1sAX4eNjyZrwvlR3Af3G8q2PUNty6S/EOgTdyvBvmu9MZF3AOXpfejW67L7vl89x/lha80xhFbnmxe97i1s8La/vvXbvbcD104r3HsdqIeB/fxvFeYWmNya3bwPGu2X+fYN9O1ufqPGCdew9/htd7Kq0xufWleDPVVoUtS/e++gpet+xXgQfwenZlxGc98s/uvDfGGJNSdirMGGNMSlliMcYYk1KWWIwxxqSUJRZjjDEpZYnFGGNMSlliMa8jIioi893jb4nIl9IdUzgR+Z54s4tOdrsfEJG9InJURM6f7PZzkduX8ya47bHPaZR1T4vIJ08tOnMqEk5NbDKTu7N7BjBDVTvDlq8HzgXmqmrrqbShqp86le1zzL8Bt6hq4pvDTFJUtTzdMZjTw45YstsuvLuDARCRNwIl6Qsnp83Bu7Ew44UNv2FMWlhiyW4P4A0HMe564PvhBdzQDf8mIntE5JA7vVUStv5vReSAiOwXkU9EbHvstJMbluLnItIhIl3u8aywsk+LyD+JyHMi0icivxaRumhBi8gW8QZrHH8eEJFOEbnAPf+RiBwUkR4R+YOILI5Rzw0i8mzEsvBTeXFfe8R2PhH5oojsFpF2Efm+iFS5Oo7izVexQUR2xNj+EhF5wcX8gohc4pa/XUReCSv3WxFZG/b8WRG5xj1uFZEviMhGV8/DIlIcVva9IrJeRLpF5I8ick7YulYRuVVENgL90ZKLey3/7t7r/e5xUdj6Za7+XhHZISJL3fIpIvJdt02XiPwsyf3/PbfPf+M+E78XkTkTea/ifU5jmBPrsygi7xeRTW4/Pi0iZ4eta3VtbRSRfhG5V0SmicgvXV2/FW94lvHyF7v3oltENojI28LW3SAiO912u0Tko0nEnRtO97An9nd6/vDm1ngH3rAMZ+N98e3F+2WtuBnj8AZAXIU3j0MF8DjwL27dUuAQ8Aa80VIfdNvOd+u/B3zVPa4FPog31EUF3tAOPwuL52m8oSAW4B01PQ18LUbsX8YbyHL8+Xt4/fwUn3BtFLn414etC4/pBuDZiLrD44/52qPEND70xjygHG+Qvwei1Rtl2ylAF/AxvNPL17nntXhDawwCdW7dQbxB/CrcfhoEasPe07V4pzin4I179im37gK8AQmXuPf6ele+KGzb9XgDCZbEiPMO4E94Q7HXA38E/smtuwhvaPV34v3gnAksdOt+gTdzYA3ehGqXJ7n/vwf04U1cVwT8R3j5ZN8rEnxOo7zOp4nxWXTL+t3rLMCb3qCF4zMvtrp9NM3tg3a8eX7Od6/hSeAfXNmZeMO+vNvts3e65/Uuzl7gLFe2AVic7u+NSft+SncA9jfBN+54Yvki8C/uP99v8L68FG+iMHH/ic4M2+7NeKOkgjcF8tfC1i2I8sXw1Rjtnwd0hT1/Gvhi2PNPA7+Kse1894VT6p7/EDfOV5Sy1S6mqsiYiPPFlui1R2nnd8Cnw56fhTdWWyC83hjbfgxYG7HseeAG9/gZ4M/wRl7+Nd6MfEuBtwMbI97TPw97/q8cnwXwm7gkELZ+G8e/5FuBTyT4zOwA3h32/Cqg1T3+NrAiyjYNePPK1ERZF3P/h71XK8PWlQNBYPbJvFeJPqdR4or5WQS+BDwSts4H7APeFrYfPxq2/sd4w/qPP/9fuB9UwK2E/fhwy1bjJf0yoBvvx1jURJ/Lf3YuNvs9APwBb56G70esq8c7wnhR5Ni0CoL3ixe8X8bh84bsjtWIiJQCK/C+EMdPBVSIiF9Vg+75wbBNBvC+SE6gqi0isgV4n4g8jjfnxfmuHT9wJ3Cti398sqw6vF/UyUr02iPN4PWvfzdekp6G98UTT+S249uPT4j0e7wBKdvc4y7gcmDYPQ8XuQ9nuMdzgOtF5H+FrS8MWw9hEzW50y7fdk+fUdWro8S5O2z72cATUV7bbOCIqnZFWZeMYzGp6lEROeLaDJ9UKmWf0zCxPouv2weqGhKRvbx+8qpDYY8Hozwfr2sOcK2IvC9sfQHwlKr2i8iHgS8A94rIc8DnVXVrErFnPbvGkuVUdTfeRfx3452+CdeJ9x9hsapWu78qPd4b5wCvn4PhjDhNfR7vV/wSVa3EO70B0ScCSsZDeKeMlgGbVbXFLf+IW/YOvDkkGuO004/3heQVEJketi7Ra4+0H++LYtwZwBiv/1KJJXLb8e3HE9J4YrnMPf49XmK5nBMTSyx7gTvDXku1qpaqaviw7sdGlFXVH6pqufsbnxAq2mscn1tjL3BmjHaniJueIEK8/T9udtj6crxTXZHzeaTyc5rI6/aBeJlsNol/PESzF++IJfw9KVPVrwGo6mpVfSduKmLgv08h7qxiiSU33Ahcoar94QtVNYT3YV4hIlMBRGSmiFzlijwC3CAii9wRyT/EaaMC7z9/t4hMSVA2GSvx5qf4K7xz5uHtDOOdqy4F/jlOHRuAxSJynrvI/Y/jK5J47ZEeAj4r3nwU5a7dh/X4lKzxPAEsEJGPiNcR4cPAIrx5ycG7lnEW3nWMtaq6Ce/LbQne0WYy/hv4lIgsEU+ZiLxH3BwrSXoI+KJ489jU4V3r+oFbdy/wcRG5UryODDNFZKF6c4f8EviGeB04CuT4zJMx93+Yd4vIpeLN8fFPwBpVDT9aSfXnNJFHgPe411mA94NpGO89Olk/wDvqvkq8KbKLReRtIjLLXfB/v3hz3wwDR/FOA+YFSyw5QFV3qOq6GKtvxbs4+ScR6QV+i/clh6r+Eu+i6ZOuzJNxmvl3vAuhnXgXN391ijEfwLsOcQneheFx38c7VbEPbyKjP8Wp4zW8C9K/xZu/4tmIIjFfexT3cfy04i5gCO98ejKv5TDwXrwvqcN4F4Tfq+7+IpfwXwI2qeqI2+x5YLd6szkm08Y64H/izd3R5V7XDclsG+arHJ/75BUX01dd/WuBj+Od7uzBO5Ia/2X/MbzrTVvxLmZ/xm2TaP+D96PhH/BmL7wQiNUzKlWf07hUdRvw58D/w/ssvw94X9j7cjJ17cU7uv47oAPvCOZv8b5XfXifh/14r/1yvGs9ecHmYzHGnBYi8j28qYe/mO5YzOSyIxZjjDEpZYnFGGNMStmpMGOMMSllRyzGGGNSyhKLMcaYlLLEYowxJqUssRhjjEkpSyzGGGNSyhKLMcaYlPr/Yu/BjfumtJAAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7ff49a011588>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "\"\\n\\n#接下来 查看特征之间的相关性，去掉一部分冗余特征\\ndata_corr = train.corr().abs()\\nplt.subplots(figsize=(13, 9))\\nsns.heatmap(data_corr,annot=True)\\n\\n# Mask unimportant features\\nsns.heatmap(data_corr, mask=data_corr < 1, cbar=False)\\n\\nplt.savefig('data.png' )\\nplt.show()\""
      ]
     },
     "execution_count": 139,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#pd.isnull(train) 经过运行 没有空的值了，开始下一步\n",
    "# 目标y（房屋价格）的直方图／分布\n",
    "fig = plt.figure()\n",
    "sns.distplot(train.SalePrice.values, bins=30, kde=True)\n",
    "plt.xlabel('Median value of owner-occupied homes', fontsize=12)\n",
    "plt.show()\n",
    "'''\n",
    "\n",
    "#接下来 查看特征之间的相关性，去掉一部分冗余特征\n",
    "data_corr = train.corr().abs()\n",
    "plt.subplots(figsize=(13, 9))\n",
    "sns.heatmap(data_corr,annot=True)\n",
    "\n",
    "# Mask unimportant features\n",
    "sns.heatmap(data_corr, mask=data_corr < 1, cbar=False)\n",
    "\n",
    "plt.savefig('data.png' )\n",
    "plt.show()'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [],
   "source": [
    "#切割 X 和 y\n",
    "X = train.drop('SalePrice', axis = 1)\n",
    "y = train['SalePrice']\n",
    "\n",
    "cols = X.columns\n",
    "\n",
    "\n",
    "from sklearn.preprocessing import Imputer  \n",
    "imp = Imputer(missing_values='NaN' , strategy='mean', axis=0)  \n",
    "X = imp.fit_transform(X)  \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1168, 74)"
      ]
     },
     "execution_count": 141,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "# 随机采样20%的数据构建测试样本，其余作为训练样本\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=33, test_size=0.2)\n",
    "X_train.shape\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/lyp/.local/lib/python3.6/site-packages/ipykernel_launcher.py:27: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead\n",
      "/home/lyp/.local/lib/python3.6/site-packages/sklearn/utils/validation.py:475: DataConversionWarning: Data with input dtype int64 was converted to float64 by StandardScaler.\n",
      "  warnings.warn(msg, DataConversionWarning)\n",
      "/home/lyp/.local/lib/python3.6/site-packages/ipykernel_launcher.py:28: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead\n"
     ]
    }
   ],
   "source": [
    "#下来数据归一化\n",
    "'''\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "\n",
    "ms_X = MinMaxScaler()\n",
    "ms_y = MinMaxScaler()\n",
    "\n",
    "X_train = ms_X.fit_transform(X_train)\n",
    "X_test = ms_X.fit_transform(X_test)\n",
    "\n",
    "\n",
    "#y_train = ms_y.fit_transform(y_train.reshape(-1, 1))\n",
    "#y_test = ms_y.transform(y_test.reshape(-1, 1))\n",
    "'''\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "# 分别初始化对特征和目标值的标准化器\n",
    "ss_X = StandardScaler()\n",
    "ss_y = StandardScaler()\n",
    "\n",
    "\n",
    "# 分别对训练和测试数据的特征以及目标值进行标准化处理\n",
    "X_train = ss_X.fit_transform(X_train)\n",
    "X_test = ss_X.transform(X_test)\n",
    "\n",
    "#对y标准化的好处是不同问题的w差异不太大，同时正则参数的范围也有限\n",
    "y_train = ss_y.fit_transform(y_train.reshape(-1, 1))\n",
    "y_test = ss_y.transform(y_test.reshape(-1, 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>coef</th>\n",
       "      <th>columns</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>[989406709202.9279]</td>\n",
       "      <td>GrLivArea</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>[102106096281.66698]</td>\n",
       "      <td>BsmtUnfSF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>[100697160749.14917]</td>\n",
       "      <td>BsmtFinSF1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>[35378892238.06502]</td>\n",
       "      <td>BsmtFinSF2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>[0.15077991024625795]</td>\n",
       "      <td>OverallQual</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>[0.1290074217909695]</td>\n",
       "      <td>Neighborhood</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>[0.08148462175603147]</td>\n",
       "      <td>OverallCond</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>[0.08100509643554688]</td>\n",
       "      <td>TotRmsAbvGrd</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>[0.07706832885742188]</td>\n",
       "      <td>KitchenQual</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>[0.06755687305548719]</td>\n",
       "      <td>BsmtExposure</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72</th>\n",
       "      <td>[0.06613540649414062]</td>\n",
       "      <td>SaleType</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>[0.06212719418597423]</td>\n",
       "      <td>BsmtQual</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>[0.05894333228392945]</td>\n",
       "      <td>ExterQual</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>[0.05656825274838995]</td>\n",
       "      <td>MasVnrArea</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>[0.05236740405216567]</td>\n",
       "      <td>YearBuilt</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>[0.04492641024395608]</td>\n",
       "      <td>RoofMatl</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>68</th>\n",
       "      <td>[0.0435943603515625]</td>\n",
       "      <td>PoolArea</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[0.03876744995460875]</td>\n",
       "      <td>LotArea</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>[0.03728628158569336]</td>\n",
       "      <td>Functional</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>[0.03388786315917969]</td>\n",
       "      <td>GarageArea</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>[0.033267974853515625]</td>\n",
       "      <td>GarageCars</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td>[0.029741287231445312]</td>\n",
       "      <td>SaleCondition</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>[0.025279998779296875]</td>\n",
       "      <td>Fireplaces</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>[0.024379931743965313]</td>\n",
       "      <td>RoofStyle</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>[0.023981571197509766]</td>\n",
       "      <td>ScreenPorch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>[0.02374929189682007]</td>\n",
       "      <td>HeatingQC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>[0.02362823486328125]</td>\n",
       "      <td>WoodDeckSF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[0.020465033906531094]</td>\n",
       "      <td>Street</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>[0.02035076596490302]</td>\n",
       "      <td>Utilities</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>[0.01818413784775888]</td>\n",
       "      <td>Condition1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>[-0.0008077621459960938]</td>\n",
       "      <td>FullBath</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>[-0.0038299560546875]</td>\n",
       "      <td>BsmtHalfBath</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>[-0.0039043426513671875]</td>\n",
       "      <td>GarageYrBlt</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>[-0.00470733642578125]</td>\n",
       "      <td>CentralAir</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>[-0.004713476595368249]</td>\n",
       "      <td>ExterCond</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>64</th>\n",
       "      <td>[-0.005405426025390625]</td>\n",
       "      <td>OpenPorchSF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>[-0.005432757077883821]</td>\n",
       "      <td>BsmtFinType1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>[-0.005644798278808594]</td>\n",
       "      <td>BsmtFinType2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>[-0.0059644319117069244]</td>\n",
       "      <td>GarageQual</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>[-0.007184991277475818]</td>\n",
       "      <td>HouseStyle</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>69</th>\n",
       "      <td>[-0.008213043212890625]</td>\n",
       "      <td>MiscVal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62</th>\n",
       "      <td>[-0.00846242904663086]</td>\n",
       "      <td>PavedDrive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[-0.010452139070132576]</td>\n",
       "      <td>MSZoning</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70</th>\n",
       "      <td>[-0.019323348999023438]</td>\n",
       "      <td>MoSold</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>[-0.019428126659487677]</td>\n",
       "      <td>MasVnrType</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>[-0.01959286976338194]</td>\n",
       "      <td>LandSlope</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>[-0.02009105682373047]</td>\n",
       "      <td>Electrical</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>[-0.023493560590905516]</td>\n",
       "      <td>YearRemodAdd</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>[-0.025156021118164062]</td>\n",
       "      <td>GarageCond</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>[-0.028501510620117188]</td>\n",
       "      <td>GarageType</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>[-0.035132860542557154]</td>\n",
       "      <td>BsmtCond</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>[-0.05123680276528571]</td>\n",
       "      <td>BldgType</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>[-0.053168073919913614]</td>\n",
       "      <td>Condition2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>[-0.06007194519042969]</td>\n",
       "      <td>BedroomAbvGr</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>[-0.07219600677490234]</td>\n",
       "      <td>KitchenAbvGr</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[-0.09524908856023656]</td>\n",
       "      <td>MSSubClass</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>[-95121837140.77956]</td>\n",
       "      <td>LowQualFinSF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>[-98447108602.9453]</td>\n",
       "      <td>TotalBsmtSF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>[-722978983982.5352]</td>\n",
       "      <td>1stFlrSF</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>[-842274449793.7386]</td>\n",
       "      <td>2ndFlrSF</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>74 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                        coef        columns\n",
       "44       [989406709202.9279]      GrLivArea\n",
       "35      [102106096281.66698]      BsmtUnfSF\n",
       "32      [100697160749.14917]     BsmtFinSF1\n",
       "34       [35378892238.06502]     BsmtFinSF2\n",
       "15     [0.15077991024625795]    OverallQual\n",
       "10      [0.1290074217909695]   Neighborhood\n",
       "16     [0.08148462175603147]    OverallCond\n",
       "52     [0.08100509643554688]   TotRmsAbvGrd\n",
       "51     [0.07706832885742188]    KitchenQual\n",
       "30     [0.06755687305548719]   BsmtExposure\n",
       "72     [0.06613540649414062]       SaleType\n",
       "28     [0.06212719418597423]       BsmtQual\n",
       "25     [0.05894333228392945]      ExterQual\n",
       "24     [0.05656825274838995]     MasVnrArea\n",
       "17     [0.05236740405216567]      YearBuilt\n",
       "20     [0.04492641024395608]       RoofMatl\n",
       "68      [0.0435943603515625]       PoolArea\n",
       "3      [0.03876744995460875]        LotArea\n",
       "53     [0.03728628158569336]     Functional\n",
       "59     [0.03388786315917969]     GarageArea\n",
       "58    [0.033267974853515625]     GarageCars\n",
       "73    [0.029741287231445312]  SaleCondition\n",
       "54    [0.025279998779296875]     Fireplaces\n",
       "19    [0.024379931743965313]      RoofStyle\n",
       "67    [0.023981571197509766]    ScreenPorch\n",
       "38     [0.02374929189682007]      HeatingQC\n",
       "63     [0.02362823486328125]     WoodDeckSF\n",
       "4     [0.020465033906531094]         Street\n",
       "7      [0.02035076596490302]      Utilities\n",
       "11     [0.01818413784775888]     Condition1\n",
       "..                       ...            ...\n",
       "47  [-0.0008077621459960938]       FullBath\n",
       "46     [-0.0038299560546875]   BsmtHalfBath\n",
       "56  [-0.0039043426513671875]    GarageYrBlt\n",
       "39    [-0.00470733642578125]     CentralAir\n",
       "26   [-0.004713476595368249]      ExterCond\n",
       "64   [-0.005405426025390625]    OpenPorchSF\n",
       "31   [-0.005432757077883821]   BsmtFinType1\n",
       "33   [-0.005644798278808594]   BsmtFinType2\n",
       "60  [-0.0059644319117069244]     GarageQual\n",
       "14   [-0.007184991277475818]     HouseStyle\n",
       "69   [-0.008213043212890625]        MiscVal\n",
       "62    [-0.00846242904663086]     PavedDrive\n",
       "1    [-0.010452139070132576]       MSZoning\n",
       "70   [-0.019323348999023438]         MoSold\n",
       "23   [-0.019428126659487677]     MasVnrType\n",
       "9     [-0.01959286976338194]      LandSlope\n",
       "40    [-0.02009105682373047]     Electrical\n",
       "18   [-0.023493560590905516]   YearRemodAdd\n",
       "61   [-0.025156021118164062]     GarageCond\n",
       "55   [-0.028501510620117188]     GarageType\n",
       "29   [-0.035132860542557154]       BsmtCond\n",
       "13    [-0.05123680276528571]       BldgType\n",
       "12   [-0.053168073919913614]     Condition2\n",
       "49    [-0.06007194519042969]   BedroomAbvGr\n",
       "50    [-0.07219600677490234]   KitchenAbvGr\n",
       "0     [-0.09524908856023656]     MSSubClass\n",
       "43      [-95121837140.77956]   LowQualFinSF\n",
       "36       [-98447108602.9453]    TotalBsmtSF\n",
       "41      [-722978983982.5352]       1stFlrSF\n",
       "42      [-842274449793.7386]       2ndFlrSF\n",
       "\n",
       "[74 rows x 2 columns]"
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 线性回归\n",
    "#class sklearn.linear_model.LinearRegression(fit_intercept=True, normalize=False, copy_X=True, n_jobs=1)\n",
    "from sklearn.linear_model import LinearRegression\n",
    "\n",
    "# 使用默认配置初始化\n",
    "lr = LinearRegression()\n",
    "\n",
    "# 训练模型参数\n",
    "lr.fit(X_train, y_train)\n",
    "\n",
    "# 预测\n",
    "y_test_pred_lr = lr.predict(X_test)\n",
    "y_train_pred_lr = lr.predict(X_train)\n",
    "\n",
    "\n",
    "# 看看各特征的权重系数，系数的绝对值大小可视为该特征的重要性\n",
    "fs = pd.DataFrame({\"columns\":list(cols), \"coef\":list((lr.coef_.T))})\n",
    "fs.sort_values(by=['coef'],ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The r2 score of LinearRegression on test is 0.6896194465016323\n",
      "The r2 score of LinearRegression on train is 0.8749723612117979\n"
     ]
    }
   ],
   "source": [
    "# 使用r2_score评价模型在测试集和训练集上的性能，并输出评估结果\n",
    "#测试集\n",
    "print( 'The r2 score of LinearRegression on test is', r2_score(y_test, y_test_pred_lr))\n",
    "#训练集\n",
    "print('The r2 score of LinearRegression on train is', r2_score(y_train, y_train_pred_lr))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The r2 score of LassoCV on test is 0.7214302750254233\n",
      "The r2 score of LassoCV on train is 0.8670626158072632\n",
      "alpha is: 0.013\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/lyp/.local/lib/python3.6/site-packages/sklearn/linear_model/coordinate_descent.py:1094: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n"
     ]
    }
   ],
   "source": [
    "#### Lasso／L1正则\n",
    "# class sklearn.linear_model.LassoCV(eps=0.001, n_alphas=100, alphas=None, fit_intercept=True, \n",
    "#                                    normalize=False, precompute=’auto’, max_iter=1000, \n",
    "#                                    tol=0.0001, copy_X=True, cv=None, verbose=False, n_jobs=1,\n",
    "#                                    positive=False, random_state=None, selection=’cyclic’)\n",
    "from sklearn.linear_model import LassoCV\n",
    "\n",
    "#设置超参数搜索范围\n",
    "#alphas = [0.001, 0.01, 0.1, 1, 10]\n",
    "#alphas = [0.005, 0.007, 0.009, 0.01, 0.012]\n",
    "alphas = [0.011, 0.012, 0.013,0.014, 0.015]\n",
    "#alphas = [0.0118, 0.012, 0.0122]\n",
    "\n",
    "\n",
    "#生成一个LassoCV实例\n",
    "lasso = LassoCV(alphas=alphas)   \n",
    "\n",
    "#训练（内含CV）\n",
    "lasso.fit(X_train, y_train)  \n",
    "\n",
    "#测试\n",
    "y_test_pred_lasso = lasso.predict(X_test)\n",
    "y_train_pred_lasso = lasso.predict(X_train)\n",
    "\n",
    "\n",
    "# 评估，使用r2_score评价模型在测试集和训练集上的性能\n",
    "print( 'The r2 score of LassoCV on test is', r2_score(y_test, y_test_pred_lasso))\n",
    "print( 'The r2 score of LassoCV on train is', r2_score(y_train, y_train_pred_lasso))\n",
    "\n",
    "print ('alpha is:', lasso.alpha_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAe8AAAFsCAYAAADlrTG7AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAHddJREFUeJzt3XuUXWWZ5/HvQ6hObALBQIIhYUxAULlWsAjJIBiNCgoSnFHB5togsBB64V2BUapVpnWBuGREpqMoIChElIbxMg2dho4wCiZ0OgIBSQtIhRiKIJeACAnP/HF2hUNSlarU7eSt8/2sVeuc/Z537/3scwK/8+69z96RmUiSpHJs1egCJEnS5jG8JUkqjOEtSVJhDG9JkgpjeEuSVBjDW5KkwhjeUg8i4t6ImN3oOhopIt4fEY9GxJqImD6M610TEbv28NpJEXH7IK3n4Yh452AsSxpOhreaUnf/094wFDJzr8y8rZflTI2IjIith6jURrsIOCszx2bmv2/4YrXtz1VhuyIiLo6IUQNdabW+3w90OdJIZXhLW7At4EvB64F7e+mzX2aOBd4GHA2cPORVSU3O8JZ6UD86j4gZEbEoIp6JiFURcXHVbWH1+FQ1+pwVEVtFxP+IiEci4vGIuCoixtUt94TqtdUR8fkN1tMeEddHxNUR8QxwUrXuX0XEUxGxMiK+GRF/Vbe8jIiPRsSDEfFsRHwpInar5nkmIubX999gG7utNSJGR8QaYBTwHxHxn729X5m5HLgDaK1b/riIuLyqe0VEfLlrZB4Rb4iIf4uIpyPiiYi4boNtekP1fIeIuKnalruA3er6bbTnIyJui4iPVM93i4h/rd7rJyLimojYvof3oqfPWNriGN5S33wD+EZmbkctPOZX7YdUj9tXu3p/BZxU/b0d2BUYC3wTICL2BL4FHAtMAsYBkzdY11zgemB74BpgHfBxYEdgFjAH+OgG8xwGvAWYCXwGmFetYxdgb+DDPWxXt7Vm5l+q0TTURta7dT/7KyLiTcDBwPK65iuBtcAbgOnAu4GPVK99CbgZeC0wBfhfPSz6UuAFau/XyWzeyD6AfwB2Bt5M7f1o76FvT5+xtMUxvNXM/qkazT4VEU9RC9WevAS8ISJ2zMw1mfnrTfQ9Frg4M3+fmWuAc4BjqtHhB4D/k5m3Z+aLwBeADW8w8KvM/KfMfDkz/5yZizPz15m5NjMfBv6R2i7qel/NzGcy817gHuDmav1PA7+gFpybW2tf3R0RzwHLgNuo3seI2Al4D/CxzHwuMx8Hvg4cU833ErXd8jtn5guZudFJaNUo/b8DX6iWcQ+1LwR9kpnLM/OW6stIJ3AxG793XTbnM5YayvBWMzsqM7fv+mPj0Wy9U4A9gPsj4jcRccQm+u4MPFI3/QiwNbBT9dqjXS9k5vPA6g3mf7R+IiL2iIifRsQfq13p/5PaKLzeqrrnf+5meizd21StfbV/tfyjgQOBbar21wMtwMq6L0j/CEysXv8MtZHxXVE7s7+7EfWEqp769+SRbvp1KyImRsS11S77Z4Cr2fi967I5n7HUUIa31AeZ+WBmfpha8HwVuD4itmHjUTPAY9SCq8t/obbreBWwktouYgAi4jXADhuuboPpy4D7gd2rXbrnUgu9wbCpWvssa+YDv6K2NwFqgfsXYMe6L0nbZeZe1Tx/zMxTM3Nn4HTgW13Huet0VvXsskGNXZ6rHv+6ru11dc//gdr7uW/13h1HD+/dJj5jaYtjeEt9EBHHRcSEzHwZeKpqXkctXF6mdry4yw+Bj0fEtIgYS22kfF1mrqV2LPt9EfFfq5PI/p7eg3hb4BlgTXVc+YxB27BN19ofXwFOi4jXZeZKase0vxYR21Unx+0WEW8DiIgPRkTXF5k/UQvZdfULy8x1wE+A9oj46+qcgRPrXu8EVgDHRcSoavRef3x+W2ANtRMKJwOf7qnwTXzG0hbH8Jb65jDg3uoM7G8Ax1THaZ8HLgDuqHYNzwS+C3yf2pnoD1E72ervAKpj0n8HXEttFP4s8Di1EWpPPgX8TdX328B1m+i7uXqstT8y87fAv/FKSJ4A/BVwH7WAvp7aiWcABwB3Vu/pTcDZmflQN4s9i9pu+T8CVwDf2+D1U6v1rQb2Av5f3Wt/T223/tPAz6h9EehJt5/xprdYaozI7G6vn6ThUI12n6K2S7y74JKkjTjyloZZRLyv2gW8DbUrmP0WeLixVUkqieEtDb+51E4UewzYndruWXeBSeozd5tLklQYR96SJBWm0Tc9AGDHHXfMqVOnNroMSZIaavHixU9k5oTe+m0R4T116lQWLVrU6DIkSWqoiOjTFQTdbS5JUmEMb0mSCmN4S5JUmC3imLckqXsvvfQSHR0dvPCCV2odScaMGcOUKVNoaWnp1/y9hndEjKF23ePRVf/rM/P8iJhG7frM44G7geMz88WIGA1cBbyF2rWGj67uQSxJ2kwdHR1su+22TJ06lYjBupmcGikzWb16NR0dHUybNq1fy+jLbvO/AO/IzP2AVuCw6uYLXwW+npm7U7vhwClV/1OAP2XmG4CvV/0kSf3wwgsvsMMOOxjcI0hEsMMOOwxob0qv4V3dp3dNNdlS/SXwDmp3CAK4Ejiqej63mqZ6fU74r06S+s3/hY48A/1M+3TCWnWf3CXUbl14C/CfwFN19/ztACZXzycDjwJUrz8N7NDNMk+LiEURsaizs3NAGyFJUjPp0wlrmbkOaI2I7YEbgDd316167O7rxEYXUM/MecA8gLa2Ni+wLkl90N4+/MsbNWoU++yzD2vXrmXatGl8//vfZ/vtt9/sdX3kIx/hE5/4BHvuueer2q+44goWLVrEN7/5zc1eJsDYsWNZs2ZN7x2B2bNnc9FFF9HW1ra+bdGiRVx11VVccskl/Vp/I2zWT8Uy8yngNmAmsH1EdIX/FGp3SILaKHwXgOr1ccCTg1GsJGn4veY1r2HJkiXcc889jB8/nksvvbRfy/nOd76zUXBvCdra2oY8uNetWzeoy+s1vCNiQjXiJiJeA7wTWAbcCnyg6nYicGP1/KZqmur1f/V2h5I0MsyaNYsVK1asn77wwgs54IAD2HfffTn//PMBeO655zj88MPZb7/92HvvvbnuuuuA2qi361LY3/ve99hjjz1429vexh133LF+eSeddBLXX3/9+umxY8cCsGbNGubMmcP+++/PPvvsw4033siGVq5cySGHHEJrayt77703v/zlL/u0TbfddhtHHHEEAO3t7Zx88snMnj2bXXfd9VWhfvXVVzNjxgxaW1s5/fTT1wfyGWecQVtbG3vttdf69wBql/7+4he/yFvf+lZ+9KMf9amWvurLbvNJwJURMYpa2M/PzJ9GxH3AtRHxZeDfgcur/pcD34+I5dRG3McMasWSpIZYt24dCxYs4JRTaj8uuvnmm3nwwQe56667yEyOPPJIFi5cSGdnJzvvvDM/+9nPAHj66adftZyVK1dy/vnns3jxYsaNG8fb3/52pk+fvsl1jxkzhhtuuIHtttuOJ554gpkzZ3LkkUe+6sSvH/zgBxx66KGcd955rFu3jueff75f23n//fdz66238uyzz/LGN76RM844g+XLl3Pddddxxx130NLSwkc/+lGuueYaTjjhBC644ALGjx/PunXrmDNnDkuXLmXfffddX/ftt9/erzo2pdfwzsylwEbvamb+HpjRTfsLwAcHpTpJUsP9+c9/prW1lYcffpi3vOUtvOtd7wJq4X3zzTevD941a9bw4IMPcvDBB/OpT32Kz372sxxxxBEcfPDBr1renXfeyezZs5kwoXbzrKOPPprf/e53m6whMzn33HNZuHAhW221FStWrGDVqlW87nWvW9/ngAMO4OSTT+all17iqKOOorW1tV/be/jhhzN69GhGjx7NxIkTWbVqFQsWLGDx4sUccMAB69+TiRMnAjB//nzmzZvH2rVrWblyJffdd9/68D766KP7VUNvvDyqJGmTuo55P/LII7z44ovrj3lnJueccw5LlixhyZIlLF++nFNOOYU99tiDxYsXs88++3DOOefwxS9+caNl9vRTqa233pqXX355/fJffPFFAK655ho6OztZvHgxS5YsYaeddtrod9KHHHIICxcuZPLkyRx//PFcddVV/dre0aNHr38+atQo1q5dS2Zy4oknrt/WBx54gPb2dh566CEuuugiFixYwNKlSzn88MNfVdc222zTrxp64+VRJRWjr2daD/YZ2aoZN24cl1xyCXPnzuWMM87g0EMP5fOf/zzHHnssY8eOZcWKFbS0tLB27VrGjx/Pcccdx9ixY7niiitetZwDDzyQs88+m9WrV7Pddtvxox/9iP322w+oHSdevHgxH/rQh7jxxht56aWXgNqu94kTJ9LS0sKtt97KI49sfOfMRx55hMmTJ3Pqqafy3HPPcffdd3PCCScMyrbPmTOHuXPn8vGPf5yJEyfy5JNP8uyzz/LMM8+wzTbbMG7cOFatWsUvfvELZs+ePSjr3BTDW5IK0ugvJtOnT2e//fbj2muv5fjjj2fZsmXMmjULqJ1cdvXVV7N8+XI+/elPs9VWW9HS0sJll132qmVMmjSJ9vZ2Zs2axaRJk9h///3Xn/x16qmnMnfuXGbMmMGcOXPWj1yPPfZY3ve+99HW1kZraytvetObNqrttttu48ILL6SlpYWxY8f2OPI+/PDD119TfNasWZx55pm9bveee+7Jl7/8Zd797nfz8ssv09LSwqWXXsrMmTOZPn06e+21F7vuuisHHXRQ39/MAYgt4UTwtra27DoDUZJ60owj72XLlvHmN3d3aQ2VrrvPNiIWZ2ZbD7Os5zFvSZIKY3hLklQYw1uStnBbwuFNDa6BfqaGtyRtwcaMGcPq1asN8BGk637eY8aM6fcyPNtckrZgU6ZMoaOjA+++OLKMGTOGKVOm9Ht+w1uStmAtLS1Mmzat0WVoC2N4SxpxNuenYiPpZ2VqHh7zliSpMIa3JEmFMbwlSSqM4S1JUmEMb0mSCmN4S5JUGMNbkqTCGN6SJBXG8JYkqTCGtyRJhTG8JUkqjOEtSVJhDG9JkgpjeEuSVBjDW5KkwhjekiQVxvCWJKkwhrckSYUxvCVJKozhLUlSYQxvSZIKY3hLklQYw1uSpMIY3pIkFcbwliSpMIa3JEmFMbwlSSqM4S1JUmEMb0mSCmN4S5JUGMNbkqTCGN6SJBWm1/COiF0i4taIWBYR90bE2VV7e0SsiIgl1d976+Y5JyKWR8QDEXHoUG6AJEnNZus+9FkLfDIz746IbYHFEXFL9drXM/Oi+s4RsSdwDLAXsDPwLxGxR2auG8zCJUlqVr2OvDNzZWbeXT1/FlgGTN7ELHOBazPzL5n5ELAcmDEYxUqSpM085h0RU4HpwJ1V01kRsTQivhsRr63aJgOP1s3WQTdhHxGnRcSiiFjU2dm52YVLktSs+hzeETEW+DHwscx8BrgM2A1oBVYCX+vq2s3suVFD5rzMbMvMtgkTJmx24ZIkNas+hXdEtFAL7msy8ycAmbkqM9dl5svAt3ll13gHsEvd7FOAxwavZEmSmltfzjYP4HJgWWZeXNc+qa7b+4F7quc3AcdExOiImAbsDtw1eCVLktTc+nK2+UHA8cBvI2JJ1XYu8OGIaKW2S/xh4HSAzLw3IuYD91E7U/1MzzSXJGnw9BremXk73R/H/vkm5rkAuGAAdUmSpB54hTVJkgpjeEuSVBjDW5KkwhjekiQVxvCWJKkwhrckSYUxvCVJKozhLUlSYQxvSZIKY3hLklQYw1uSpMIY3pIkFcbwliSpMIa3JEmFMbwlSSqM4S1JUmEMb0mSCmN4S5JUGMNbkqTCGN6SJBXG8JYkqTCGtyRJhTG8JUkqjOEtSVJhDG9JkgpjeEuSVBjDW5KkwhjekiQVxvCWJKkwhrckSYUxvCVJKozhLUlSYQxvSZIKY3hLklQYw1uSpMIY3pIkFcbwliSpMIa3JEmFMbwlSSqM4S1JUmEMb0mSCmN4S5JUmF7DOyJ2iYhbI2JZRNwbEWdX7eMj4paIeLB6fG3VHhFxSUQsj4ilEbH/UG+EJEnNpC8j77XAJzPzzcBM4MyI2BP4HLAgM3cHFlTTAO8Bdq/+TgMuG/SqJUlqYr2Gd2auzMy7q+fPAsuAycBc4Mqq25XAUdXzucBVWfNrYPuImDTolUuS1KQ265h3REwFpgN3Ajtl5kqoBTwwseo2GXi0braOqm3DZZ0WEYsiYlFnZ+fmVy5JUpPqc3hHxFjgx8DHMvOZTXXtpi03asicl5ltmdk2YcKEvpYhSVLT61N4R0QLteC+JjN/UjWv6todXj0+XrV3ALvUzT4FeGxwypUkSX052zyAy4FlmXlx3Us3ASdWz08EbqxrP6E663wm8HTX7nVJkjRwW/ehz0HA8cBvI2JJ1XYu8BVgfkScAvwB+GD12s+B9wLLgeeBvx3UiiVJanK9hndm3k73x7EB5nTTP4EzB1iXpCbS3t7oCqSyeIU1SZIKY3hLklQYw1uSpMIY3pIkFcbwliSpMIa3JEmFMbwlSSqM4S1JUmEMb0mSCmN4S5JUGMNbkqTCGN6SJBXG8JYkqTCGtyRJhTG8JUkqjOEtSVJhDG9JkgpjeEuSVBjDW5KkwhjekiQVxvCWJKkwhrckSYUxvCVJKozhLUlSYQxvSZIKY3hLklQYw1uSpMIY3pIkFcbwliSpMIa3JEmFMbwlSSqM4S1JUmEMb0mSCmN4S5JUGMNbkqTCbN3oAiSpkdrbB7efNBwceUuSVBjDW5KkwhjekiQVxvCWJKkwhrckSYUxvCVJKkyv4R0R342IxyPinrq29ohYERFLqr/31r12TkQsj4gHIuLQoSpckqRm1ZeR9xXAYd20fz0zW6u/nwNExJ7AMcBe1TzfiohRg1WsJEnqQ3hn5kLgyT4uby5wbWb+JTMfApYDMwZQnyRJ2sBAjnmfFRFLq93qr63aJgOP1vXpqNokSdIg6W94XwbsBrQCK4GvVe3RTd/sbgERcVpELIqIRZ2dnf0sQ5Kk5tOv8M7MVZm5LjNfBr7NK7vGO4Bd6rpOAR7rYRnzMrMtM9smTJjQnzIkSWpK/QrviJhUN/l+oOtM9JuAYyJidERMA3YH7hpYiZIkqV6vdxWLiB8Cs4EdI6IDOB+YHRGt1HaJPwycDpCZ90bEfOA+YC1wZmauG5rSJUlqTr2Gd2Z+uJvmyzfR/wLggoEUJUmSeuYV1iRJKozhLUlSYQxvSZIKY3hLklQYw1uSpMIY3pIkFcbwliSpMIa3JEmFMbwlSSqM4S1JUmEMb0mSCmN4S5JUGMNbkqTCGN6SJBXG8JYkqTCGtyRJhTG8JUkqjOEtSVJhDG9JkgpjeEuSVBjDW5KkwhjekiQVxvCWJKkwhrckSYUxvCVJKozhLUlSYQxvSZIKY3hLklQYw1uSpMIY3pIkFcbwliSpMIa3JEmFMbwlSSqM4S1JUmEMb0mSCmN4S5JUGMNbkqTCGN6SJBXG8JYkqTCGtyRJhTG8JUkqjOEtSVJhDG9JkgrTa3hHxHcj4vGIuKeubXxE3BIRD1aPr63aIyIuiYjlEbE0IvYfyuIlSWpGfRl5XwEctkHb54AFmbk7sKCaBngPsHv1dxpw2eCUKUmSuvQa3pm5EHhyg+a5wJXV8yuBo+rar8qaXwPbR8SkwSpWkiT1/5j3Tpm5EqB6nFi1TwYerevXUbVtJCJOi4hFEbGos7Ozn2VIktR8BvuEteimLbvrmJnzMrMtM9smTJgwyGVIkjRy9Te8V3XtDq8eH6/aO4Bd6vpNAR7rf3mSJGlD/Q3vm4ATq+cnAjfWtZ9QnXU+E3i6a/e6JEkaHFv31iEifgjMBnaMiA7gfOArwPyIOAX4A/DBqvvPgfcCy4Hngb8dgpolSWpqvYZ3Zn64h5fmdNM3gTMHWpQkSeqZV1iTJKkwhrckSYUxvCVJKozhLUlSYQxvSZIKY3hLklQYw1uSpMIY3pIkFcbwliSpML1eYU2S+qu9vdEVSCOTI29JkgpjeEuSVBjDW5KkwhjekiQVxvCWJKkwhrckSYUxvCVJKozhLUlSYQxvSZIKY3hLklQYw1uSpMIY3pIkFcbwliSpMIa3JEmFMbwlSSqM4S1JUmEMb0mSCmN4S5JUGMNbkqTCGN6SJBXG8JYkqTCGtyRJhTG8JUkqjOEtSVJhtm50AZJUgvb2we0nDYQjb0mSCmN4S5JUGMNbkqTCGN6SJBXG8JYkqTCGtyRJhTG8JUkqzIB+5x0RDwPPAuuAtZnZFhHjgeuAqcDDwIcy808DK1OSJHUZjJH32zOzNTPbqunPAQsyc3dgQTUtSZIGyVDsNp8LXFk9vxI4agjWIUlS0xpoeCdwc0QsjojTqradMnMlQPU4sbsZI+K0iFgUEYs6OzsHWIYkSc1joNc2PygzH4uIicAtEXF/X2fMzHnAPIC2trYcYB2SJDWNAY28M/Ox6vFx4AZgBrAqIiYBVI+PD7RISZL0in6Hd0RsExHbdj0H3g3cA9wEnFh1OxG4caBFSpKkVwxkt/lOwA0R0bWcH2Tm/42I3wDzI+IU4A/ABwdepiRJ6tLv8M7M3wP7ddO+GpgzkKIkSVLPvMKaJEmFMbwlSSqM4S1JUmEMb0mSCmN4S5JUGMNbkqTCGN6SJBXG8JYkqTCGtyRJhTG8JUkqjOEtSVJhDG9JkgpjeEuSVBjDW5KkwhjekiQVxvCWJKkwhrckSYUxvCVJKozhLUlSYQxvSZIKY3hLklSYrRtdgKSytLc3ugJJhrckDaK+frnxS5AGwt3mkiQVxvCWJKkwhrckSYUxvCVJKozhLUlSYQxvSZIKY3hLklQYw1uSpMIY3pIkFcbwliSpMIa3JEmFMbwlSSqM4S1JUmG8q5gkwLtcSSVx5C1JUmEMb0mSCmN4S5JUGI95SyOcx7KlkcfwlqQG2JwvVX4B04bcbS5JUmGGbOQdEYcB3wBGAd/JzK8M1bokSX0foTfrSH4kvT9DEt4RMQq4FHgX0AH8JiJuysz7hmJ9UrMp4X8uGjyD/XmPpBBrVkM18p4BLM/M3wNExLXAXGDYwtt/nNpQI/9N+O9MJWrkcflG/jdTQn5EZg7+QiM+AByWmR+ppo8HDszMs+r6nAacVk2+EXhg0Avpux2BJxq4/kZwm5tDM24zNOd2u80jw+szc0JvnYZq5B3dtL3qW0JmzgPmDdH6N0tELMrMtkbXMZzc5ubQjNsMzbndbnNzGaqzzTuAXeqmpwCPDdG6JElqKkMV3r8Bdo+IaRHxV8AxwE1DtC5JkprKkOw2z8y1EXEW8M/Ufir23cy8dyjWNUi2iN33w8xtbg7NuM3QnNvtNjeRITlhTZIkDR2vsCZJUmEMb0mSCmN414mIT0VERsSOja5lOETElyJiaUQsiYibI2LnRtc01CLiwoi4v9ruGyJi+0bXNNQi4oMRcW9EvBwRI/pnNRFxWEQ8EBHLI+Jzja5nOETEdyPi8Yi4p9G1DJeI2CUibo2IZdW/7bMbXdNwM7wrEbELtcu5/qHRtQyjCzNz38xsBX4KfKHRBQ2DW4C9M3Nf4HfAOQ2uZzjcA/w3YGGjCxlKdZdlfg+wJ/DhiNizsVUNiyuAwxpdxDBbC3wyM98MzATObJLPej3D+xVfBz7DBheTGcky85m6yW1ogm3PzJszc201+Wtq1yAY0TJzWWY28gqGw2X9ZZkz80Wg67LMI1pmLgSebHQdwykzV2bm3dXzZ4FlwOTGVjW8vJ83EBFHAisy8z8iurs43MgVERcAJwBPA29vcDnD7WTgukYXoUEzGXi0broDOLBBtWiYRMRUYDpwZ2MrGV5NE94R8S/A67p56TzgXODdw1vR8NjUdmfmjZl5HnBeRJwDnAWcP6wFDoHetrnqcx61XW/XDGdtQ6Uv29wEer0ss0aWiBgL/Bj42AZ7Eke8pgnvzHxnd+0RsQ8wDegadU8B7o6IGZn5x2EscUj0tN3d+AHwM0ZAePe2zRFxInAEMCdHyIUONuNzHsm8LHMTiYgWasF9TWb+pNH1DLemCe+eZOZvgYld0xHxMNCWmSPtTjUbiYjdM/PBavJI4P5G1jMcIuIw4LPA2zLz+UbXo0G1/rLMwApql2X+m8aWpKEQtZHW5cCyzLy40fU0giesNbevRMQ9EbGU2mGDZvi5xTeBbYFbqp/I/e9GFzTUIuL9EdEBzAJ+FhH/3OiahkJ1ImLXZZmXAfO38MsyD4qI+CHwK+CNEdEREac0uqZhcBBwPPCO6r/jJRHx3kYXNZy8PKokSYVx5C1JUmEMb0mSCmN4S5JUGMNbkqTCGN6SJBXG8JYkqTCGtyRJhfn/vfU2Ql1bIfAAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7ff49a011d30>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#在训练集上观察预测残差的分布，看是否符合模型假设：噪声为0均值的高斯噪声\n",
    "f, ax = plt.subplots(figsize=(7, 5)) \n",
    "f.tight_layout() \n",
    "ax.hist(y_train - y_train_pred_lr,bins=40, label='Residuals Linear', color='b', alpha=.5); \n",
    "ax.set_title(\"Histogram of Residuals\") \n",
    "ax.legend(loc='best');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
